﻿//
// Author: Ruxo Zheng (http://ruxozheng.spaces.live.com/)
//
// This file is distributed under CPOL 1.0 License (http://www.codeproject.com/info/cpol10.aspx).
//

using System;
using System.Diagnostics;

namespace RZ.Web
{
    public sealed class HtmlParser
    {
        HtmlTagStack tagStack = new HtmlTagStack();

        HtmlLegacyLexer lexer;

        HtmlContent currentContent = null;

        IHtmlParser parser;

        public HtmlParser(String htmlContent)
        {
            CodeValidator.ArgumentValidIf(htmlContent != null);

            this.lexer = new HtmlLegacyLexer(htmlContent);
            this.parser = new HtmlLegacyParser(this.lexer);
        }

        public HtmlContent CurrentContent
        {
            get { return this.currentContent; }
        }

        public bool EndOfContent
        {
            get { return this.lexer.EOC; }
        }

        public Boolean MoveToTag(string tagName)
        {
            while (MoveToHeadTag())
            {
                var headTag = (HtmlContentHeadTag)this.CurrentContent;
                if (headTag.TagName == tagName)
                    return true;
            }
            return false;
        }

        public Boolean MoveToHeadTag()
        {
            while (!EndOfContent)
            {
                var headTag = FetchNextContent() as HtmlContentHeadTag;

                if (headTag != null)
                    return true;
            }

            return false;
        }

        /// <summary>
        /// Parse content into a block.
        /// </summary>
        /// <param name="hasMatch">If current tag has not matched close tag, return false and its inner blocks are the owner's.</param>
        /// <exception cref="InvalidOperationException">If this is called when all content have been read.</exception>
        /// <exception cref="HtmlParserException">If Tag block is incomplete.</exception>
        public HtmlContentBlock GrabCurrentTag(out Boolean hasMatch)
        {
            if (EndOfContent)
                throw new InvalidOperationException("The content is end.");

            Int32 currentCursor = this.lexer.Cursor;

            var block = new HtmlContentBlock(this.CurrentContent as HtmlContentHeadTag);

            Int32 stackIndex = this.tagStack.Count;

            Debug.Assert(stackIndex > 0);

            while (!EndOfContent && !block.IsClosed)
            {
                Boolean closeThisBlock = false;

                var contentText = FetchNextContent() as HtmlContentText;

                if (contentText == null)
                {
                    if (this.tagStack.Count < stackIndex)   // this means some block in stack is just closed.
                    {
                        closeThisBlock = true;
                    }
                    else if (this.CurrentContent is HtmlContentCloseTag)
                    {
                        Debug.WriteLine("Unmatched close tag found!");
                        continue;   // This is extra close tag (unmatched pair), discard it.
                    }
                    else
                    {
                        Boolean innerHasMatch;
                        var innerBlock = GrabCurrentTag(out innerHasMatch);

                        block.Add(innerBlock);

                        if (!innerHasMatch)
                        {
                            // innerBlock must be added into block before moving inner content, otherwise the tag order will be wrong.
                            innerBlock.MoveInnerBlockTo(block);

                            closeThisBlock = (this.CurrentContent is HtmlContentCloseTag);
                        }
                    }

                    if (closeThisBlock)
                        if (this.tagStack.Count == stackIndex - 1)    // this is my block
                            block.Close((HtmlContentCloseTag)this.CurrentContent);
                        else
                        {
                            hasMatch = false;
                            block.ForceClose(); // no unmatched close found...
                            return block;
                        }
                }
                else
                    block.Add(contentText);
            }

            if (!block.IsClosed)
                throw new HtmlParserException("Incomplete tag block!", currentCursor, this.lexer.Content);

            hasMatch = true;
            return block;
        }

        public HtmlContent FetchNextContent()
        {
            if (IsScriptTag(this.CurrentContent as HtmlContentHeadTag))
                this.currentContent = GetScriptContent();
            else
            {
                this.currentContent = this.parser.GetNextContent();

                SyncCurrentContentWithTagStack();
            }

            return this.currentContent;
        }

        static Boolean IsScriptTag(HtmlContentHeadTag tag)
        {
            return tag != null && (tag.TagName == "script");
        }

        HtmlContentText GetScriptContent()
        {
            var tag = (HtmlContentHeadTag)this.currentContent;

            if (tag.IsClosed)
                return new HtmlContentText(String.Empty);

            String lax = tag.Attributes["language"];

            if (lax == null || lax.Trim() == String.Empty)
                lax = "javascript";
            else
                lax = lax.Trim().ToString();

            IHtmlParser scriptParser = null;

            if (lax.StartsWith("javascript", StringComparison.InvariantCultureIgnoreCase))
                scriptParser = new HtmlJScriptContentParser(this.lexer);
            else
                throw new NotSupportedException(lax + " script is not yet supported.");

            return (HtmlContentText)scriptParser.GetNextContent();
        }

        void SyncCurrentContentWithTagStack()
        {
            var openTag = this.currentContent as HtmlContentOpenTag;

            if (openTag != null)
            {
                this.tagStack.Push(openTag.TagName);
            }
            else
            {
                var closeTag = this.currentContent as HtmlContentCloseTag;

                if (closeTag != null)
                {
                    if (this.tagStack.Count > 0)
                    {
                        Int32 closeTagIndex = this.tagStack.LastIndexOf(closeTag.TagName);

                        if (closeTagIndex != HtmlTagStack.NPos)
                            this.tagStack.RemoveFrom(closeTagIndex);
                        // else, it is unmatched close tag.
                    }
                }
                // else, ignore other tags.
            }
        }
    }
}
